{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('survey_results_public.csv')\n",
    "schema_df = pd.read_csv('survey_results_schema.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.set_option('display.max_columns',15)\n",
    "pd.set_option('display.max_rows',15)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Respondent</th>\n",
       "      <th>Hobby</th>\n",
       "      <th>OpenSource</th>\n",
       "      <th>Country</th>\n",
       "      <th>Student</th>\n",
       "      <th>Employment</th>\n",
       "      <th>FormalEducation</th>\n",
       "      <th>...</th>\n",
       "      <th>EducationParents</th>\n",
       "      <th>RaceEthnicity</th>\n",
       "      <th>Age</th>\n",
       "      <th>Dependents</th>\n",
       "      <th>MilitaryUS</th>\n",
       "      <th>SurveyTooLong</th>\n",
       "      <th>SurveyEasy</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>Yes</td>\n",
       "      <td>No</td>\n",
       "      <td>Kenya</td>\n",
       "      <td>No</td>\n",
       "      <td>Employed part-time</td>\n",
       "      <td>Bachelor’s degree (BA, BS, B.Eng., etc.)</td>\n",
       "      <td>...</td>\n",
       "      <td>Bachelor’s degree (BA, BS, B.Eng., etc.)</td>\n",
       "      <td>Black or of African descent</td>\n",
       "      <td>25 - 34 years old</td>\n",
       "      <td>Yes</td>\n",
       "      <td>NaN</td>\n",
       "      <td>The survey was an appropriate length</td>\n",
       "      <td>Very easy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Yes</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>No</td>\n",
       "      <td>Employed full-time</td>\n",
       "      <td>Bachelor’s degree (BA, BS, B.Eng., etc.)</td>\n",
       "      <td>...</td>\n",
       "      <td>Bachelor’s degree (BA, BS, B.Eng., etc.)</td>\n",
       "      <td>White or of European descent</td>\n",
       "      <td>35 - 44 years old</td>\n",
       "      <td>Yes</td>\n",
       "      <td>NaN</td>\n",
       "      <td>The survey was an appropriate length</td>\n",
       "      <td>Somewhat easy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Yes</td>\n",
       "      <td>United States</td>\n",
       "      <td>No</td>\n",
       "      <td>Employed full-time</td>\n",
       "      <td>Associate degree</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "      <td>No</td>\n",
       "      <td>No</td>\n",
       "      <td>United States</td>\n",
       "      <td>No</td>\n",
       "      <td>Employed full-time</td>\n",
       "      <td>Bachelor’s degree (BA, BS, B.Eng., etc.)</td>\n",
       "      <td>...</td>\n",
       "      <td>Some college/university study without earning ...</td>\n",
       "      <td>White or of European descent</td>\n",
       "      <td>35 - 44 years old</td>\n",
       "      <td>No</td>\n",
       "      <td>No</td>\n",
       "      <td>The survey was an appropriate length</td>\n",
       "      <td>Somewhat easy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>7</td>\n",
       "      <td>Yes</td>\n",
       "      <td>No</td>\n",
       "      <td>South Africa</td>\n",
       "      <td>Yes, part-time</td>\n",
       "      <td>Employed full-time</td>\n",
       "      <td>Some college/university study without earning ...</td>\n",
       "      <td>...</td>\n",
       "      <td>Some college/university study without earning ...</td>\n",
       "      <td>White or of European descent</td>\n",
       "      <td>18 - 24 years old</td>\n",
       "      <td>Yes</td>\n",
       "      <td>NaN</td>\n",
       "      <td>The survey was an appropriate length</td>\n",
       "      <td>Somewhat easy</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 129 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Respondent Hobby OpenSource         Country         Student  \\\n",
       "0           1   Yes         No           Kenya              No   \n",
       "1           3   Yes        Yes  United Kingdom              No   \n",
       "2           4   Yes        Yes   United States              No   \n",
       "3           5    No         No   United States              No   \n",
       "4           7   Yes         No    South Africa  Yes, part-time   \n",
       "\n",
       "           Employment                                    FormalEducation  ...  \\\n",
       "0  Employed part-time           Bachelor’s degree (BA, BS, B.Eng., etc.)  ...   \n",
       "1  Employed full-time           Bachelor’s degree (BA, BS, B.Eng., etc.)  ...   \n",
       "2  Employed full-time                                   Associate degree  ...   \n",
       "3  Employed full-time           Bachelor’s degree (BA, BS, B.Eng., etc.)  ...   \n",
       "4  Employed full-time  Some college/university study without earning ...  ...   \n",
       "\n",
       "                                    EducationParents  \\\n",
       "0           Bachelor’s degree (BA, BS, B.Eng., etc.)   \n",
       "1           Bachelor’s degree (BA, BS, B.Eng., etc.)   \n",
       "2                                                NaN   \n",
       "3  Some college/university study without earning ...   \n",
       "4  Some college/university study without earning ...   \n",
       "\n",
       "                  RaceEthnicity                Age Dependents MilitaryUS  \\\n",
       "0   Black or of African descent  25 - 34 years old        Yes        NaN   \n",
       "1  White or of European descent  35 - 44 years old        Yes        NaN   \n",
       "2                           NaN                NaN        NaN        NaN   \n",
       "3  White or of European descent  35 - 44 years old         No         No   \n",
       "4  White or of European descent  18 - 24 years old        Yes        NaN   \n",
       "\n",
       "                          SurveyTooLong     SurveyEasy  \n",
       "0  The survey was an appropriate length      Very easy  \n",
       "1  The survey was an appropriate length  Somewhat easy  \n",
       "2                                   NaN            NaN  \n",
       "3  The survey was an appropriate length  Somewhat easy  \n",
       "4  The survey was an appropriate length  Somewhat easy  \n",
       "\n",
       "[5 rows x 129 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Column</th>\n",
       "      <th>QuestionText</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>Respondent</td>\n",
       "      <td>Randomized respondent ID number (not in order ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>Hobby</td>\n",
       "      <td>Do you code as a hobby?</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>OpenSource</td>\n",
       "      <td>Do you contribute to open source projects?</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>Country</td>\n",
       "      <td>In which country do you currently reside?</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>Student</td>\n",
       "      <td>Are you currently enrolled in a formal, degree...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>124</td>\n",
       "      <td>Age</td>\n",
       "      <td>What is your age? If you prefer not to answer,...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>125</td>\n",
       "      <td>Dependents</td>\n",
       "      <td>Do you have any children or other dependents t...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>126</td>\n",
       "      <td>MilitaryUS</td>\n",
       "      <td>Are you currently serving or have you ever ser...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>127</td>\n",
       "      <td>SurveyTooLong</td>\n",
       "      <td>How do you feel about the length of the survey...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>128</td>\n",
       "      <td>SurveyEasy</td>\n",
       "      <td>How easy or difficult was this survey to compl...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>129 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            Column                                       QuestionText\n",
       "0       Respondent  Randomized respondent ID number (not in order ...\n",
       "1            Hobby                            Do you code as a hobby?\n",
       "2       OpenSource         Do you contribute to open source projects?\n",
       "3          Country          In which country do you currently reside?\n",
       "4          Student  Are you currently enrolled in a formal, degree...\n",
       "..             ...                                                ...\n",
       "124            Age  What is your age? If you prefer not to answer,...\n",
       "125     Dependents  Do you have any children or other dependents t...\n",
       "126     MilitaryUS  Are you currently serving or have you ever ser...\n",
       "127  SurveyTooLong  How do you feel about the length of the survey...\n",
       "128     SurveyEasy  How easy or difficult was this survey to compl...\n",
       "\n",
       "[129 rows x 2 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "schema_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "person = {\n",
    "    \"first\": \"Corey\", \n",
    "    \"last\": \"Schafer\", \n",
    "    \"email\": \"CoreyMSchafer@gmail.com\"\n",
    "}\n",
    "\n",
    "\n",
    "people = {\n",
    "    \"first\": [\"Corey\"], \n",
    "    \"last\": [\"Schafer\"], \n",
    "    \"email\": [\"CoreyMSchafer@gmail.com\"]\n",
    "}\n",
    "\n",
    "\n",
    "people = {\n",
    "    \"first\": [\"Corey\", 'Jane', 'John'], \n",
    "    \"last\": [\"Schafer\", 'Doe', 'Doe'], \n",
    "    \"email\": [\"CoreyMSchafer@gmail.com\", 'JaneDoe@email.com', 'JohnDoe@email.com']\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['CoreyMSchafer@gmail.com', 'JaneDoe@email.com', 'JohnDoe@email.com']"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "people['email']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame(people)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>first</th>\n",
       "      <th>last</th>\n",
       "      <th>email</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>Corey</td>\n",
       "      <td>Schafer</td>\n",
       "      <td>CoreyMSchafer@gmail.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>Jane</td>\n",
       "      <td>Doe</td>\n",
       "      <td>JaneDoe@email.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>John</td>\n",
       "      <td>Doe</td>\n",
       "      <td>JohnDoe@email.com</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   first     last                    email\n",
       "0  Corey  Schafer  CoreyMSchafer@gmail.com\n",
       "1   Jane      Doe        JaneDoe@email.com\n",
       "2   John      Doe        JohnDoe@email.com"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    CoreyMSchafer@gmail.com\n",
       "1          JaneDoe@email.com\n",
       "2          JohnDoe@email.com\n",
       "Name: email, dtype: object"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['email']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.series.Series"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(df['email'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    CoreyMSchafer@gmail.com\n",
       "1          JaneDoe@email.com\n",
       "2          JohnDoe@email.com\n",
       "Name: email, dtype: object"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    " df.email"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>last</th>\n",
       "      <th>email</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>Schafer</td>\n",
       "      <td>CoreyMSchafer@gmail.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>Doe</td>\n",
       "      <td>JaneDoe@email.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>Doe</td>\n",
       "      <td>JohnDoe@email.com</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      last                    email\n",
       "0  Schafer  CoreyMSchafer@gmail.com\n",
       "1      Doe        JaneDoe@email.com\n",
       "2      Doe        JohnDoe@email.com"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[['last','email']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['first', 'last', 'email'], dtype='object')"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "first                      Corey\n",
       "last                     Schafer\n",
       "email    CoreyMSchafer@gmail.com\n",
       "Name: 0, dtype: object"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iloc[0] # first row"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>first</th>\n",
       "      <th>last</th>\n",
       "      <th>email</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>Corey</td>\n",
       "      <td>Schafer</td>\n",
       "      <td>CoreyMSchafer@gmail.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>Jane</td>\n",
       "      <td>Doe</td>\n",
       "      <td>JaneDoe@email.com</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   first     last                    email\n",
       "0  Corey  Schafer  CoreyMSchafer@gmail.com\n",
       "1   Jane      Doe        JaneDoe@email.com"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iloc[[0,1]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    CoreyMSchafer@gmail.com\n",
       "1          JaneDoe@email.com\n",
       "Name: email, dtype: object"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iloc[[0,1],2] # get first and second row's email attr "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>first</th>\n",
       "      <th>last</th>\n",
       "      <th>email</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>Corey</td>\n",
       "      <td>Schafer</td>\n",
       "      <td>CoreyMSchafer@gmail.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>Jane</td>\n",
       "      <td>Doe</td>\n",
       "      <td>JaneDoe@email.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>John</td>\n",
       "      <td>Doe</td>\n",
       "      <td>JohnDoe@email.com</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   first     last                    email\n",
       "0  Corey  Schafer  CoreyMSchafer@gmail.com\n",
       "1   Jane      Doe        JaneDoe@email.com\n",
       "2   John      Doe        JohnDoe@email.com"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "first                      Corey\n",
       "last                     Schafer\n",
       "email    CoreyMSchafer@gmail.com\n",
       "Name: 0, dtype: object"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>first</th>\n",
       "      <th>last</th>\n",
       "      <th>email</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>Corey</td>\n",
       "      <td>Schafer</td>\n",
       "      <td>CoreyMSchafer@gmail.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>Jane</td>\n",
       "      <td>Doe</td>\n",
       "      <td>JaneDoe@email.com</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   first     last                    email\n",
       "0  Corey  Schafer  CoreyMSchafer@gmail.com\n",
       "1   Jane      Doe        JaneDoe@email.com"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[[0,1]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    CoreyMSchafer@gmail.com\n",
       "1          JaneDoe@email.com\n",
       "Name: email, dtype: object"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[[0,1],'email']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>email</th>\n",
       "      <th>last</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>CoreyMSchafer@gmail.com</td>\n",
       "      <td>Schafer</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>JaneDoe@email.com</td>\n",
       "      <td>Doe</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     email     last\n",
       "0  CoreyMSchafer@gmail.com  Schafer\n",
       "1        JaneDoe@email.com      Doe"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[[0,1],['email','last']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv('survey_results_public.csv')\n",
    "schema_df = pd.read_csv('survey_results_schema.csv')\n",
    "\n",
    "pd.set_option('display.max_columns',15)\n",
    "pd.set_option('display.max_rows',15)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(98855, 129)"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['Respondent', 'Hobby', 'OpenSource', 'Country', 'Student', 'Employment',\n",
       "       'FormalEducation', 'UndergradMajor', 'CompanySize', 'DevType',\n",
       "       ...\n",
       "       'Exercise', 'Gender', 'SexualOrientation', 'EducationParents',\n",
       "       'RaceEthnicity', 'Age', 'Dependents', 'MilitaryUS', 'SurveyTooLong',\n",
       "       'SurveyEasy'],\n",
       "      dtype='object', length=129)"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0        Yes\n",
       "1        Yes\n",
       "2        Yes\n",
       "3         No\n",
       "4        Yes\n",
       "        ... \n",
       "98850    Yes\n",
       "98851     No\n",
       "98852    Yes\n",
       "98853    Yes\n",
       "98854    Yes\n",
       "Name: Hobby, Length: 98855, dtype: object"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['Hobby']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Yes    79897\n",
       "No     18958\n",
       "Name: Hobby, dtype: int64"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['Hobby'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Respondent                                          1\n",
       "Hobby                                             Yes\n",
       "OpenSource                                         No\n",
       "Country                                         Kenya\n",
       "Student                                            No\n",
       "                                 ...                 \n",
       "Age                                 25 - 34 years old\n",
       "Dependents                                        Yes\n",
       "MilitaryUS                                        NaN\n",
       "SurveyTooLong    The survey was an appropriate length\n",
       "SurveyEasy                                  Very easy\n",
       "Name: 0, Length: 129, dtype: object"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Yes'"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[0,'Hobby']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    Yes\n",
       "1    Yes\n",
       "2    Yes\n",
       "Name: Hobby, dtype: object"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[[0,1,2],'Hobby']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    Yes\n",
       "1    Yes\n",
       "2    Yes\n",
       "Name: Hobby, dtype: object"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[0:2,'Hobby']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Hobby</th>\n",
       "      <th>OpenSource</th>\n",
       "      <th>Country</th>\n",
       "      <th>Student</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>Yes</td>\n",
       "      <td>No</td>\n",
       "      <td>Kenya</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Yes</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Yes</td>\n",
       "      <td>United States</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Hobby OpenSource         Country Student\n",
       "0   Yes         No           Kenya      No\n",
       "1   Yes        Yes  United Kingdom      No\n",
       "2   Yes        Yes   United States      No"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[0:2,'Hobby':'Student']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
